#include <linux/ethtool.h>
#include <linux/in.h>
#include <linux/if_ether.h>
+#include <linux/io.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
#include <net/route.h>
-#include <asm/io.h>
#include <asm/uaccess.h>
#include <xen/evtchn.h>
#include <xen/xenbus.h>
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
+#define RX_COPY_THRESHOLD 256
+
#define GRANT_INVALID_REF 0
#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
-static inline void init_skb_shinfo(struct sk_buff *skb)
-{
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->frag_list = NULL;
-}
-
struct netfront_info {
struct list_head list;
struct net_device *netdev;
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
err = xenbus_transaction_end(xbt, 0);
if (err) {
if (err == -EAGAIN)
unsigned short id;
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
+ struct page *page;
int i, batch_target, notify;
RING_IDX req_prod = np->rx.req_prod_pvt;
struct xen_memory_reservation reservation;
grant_ref_t ref;
+ unsigned long pfn;
+ void *vaddr;
if (unlikely(!netif_carrier_ok(dev)))
return;
*/
batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
- /*
- * Subtract dev_alloc_skb headroom (16 bytes) and shared info
- * tailroom then round down to SKB_DATA_ALIGN boundary.
- */
- skb = __dev_alloc_skb(
- ((PAGE_SIZE - sizeof(struct skb_shared_info)) &
- (-SKB_DATA_ALIGN(1))) - 16,
- GFP_ATOMIC|__GFP_NOWARN);
- if (skb == NULL) {
+ /* Allocate an skb and a page. */
+ skb = __dev_alloc_skb(RX_COPY_THRESHOLD,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ goto no_skb;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (!page) {
+ kfree_skb(skb);
+no_skb:
/* Any skbuffs queued for refill? Force them out. */
if (i != 0)
goto refill;
jiffies + (HZ/10));
break;
}
+
+ skb_shinfo(skb)->frags[0].page = page;
+ skb_shinfo(skb)->nr_frags = 1;
__skb_queue_tail(&np->rx_batch, skb);
}
ref = gnttab_claim_grant_reference(&np->gref_rx_head);
BUG_ON((signed short)ref < 0);
np->grant_rx_ref[id] = ref;
+
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
+ vaddr = page_address(skb_shinfo(skb)->frags[0].page);
+
gnttab_grant_foreign_transfer_ref(ref,
- np->xbdev->otherend_id,
- __pa(skb->head)>>PAGE_SHIFT);
+ np->xbdev->otherend_id, pfn);
RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
- np->rx_pfn_array[i] = virt_to_mfn(skb->head);
+ np->rx_pfn_array[i] = pfn_to_mfn(pfn);
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Remove this page before passing back to Xen. */
- set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
- INVALID_P2M_ENTRY);
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
MULTI_update_va_mapping(np->rx_mcl+i,
- (unsigned long)skb->head,
+ (unsigned long)vaddr,
__pte(0), 0);
}
}
np->rx.req_prod_pvt++;
}
+static int xennet_get_responses(struct netfront_info *np,
+ struct netif_rx_response *rx, RING_IDX rp,
+ struct sk_buff_head *list, int count)
+{
+ struct mmu_update *mmu = np->rx_mmu + count;
+ struct multicall_entry *mcl = np->rx_mcl + count;
+ RING_IDX cons = np->rx.rsp_cons;
+ struct sk_buff *skb = xennet_get_rx_skb(np, cons);
+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
+ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+ int frags = 1;
+ int err = 0;
+
+ for (;;) {
+ unsigned long mfn;
+
+ if (unlikely(rx->status < 0 ||
+ rx->offset + rx->status > PAGE_SIZE)) {
+ if (net_ratelimit())
+ WPRINTK("rx->offset: %x, size: %u\n",
+ rx->offset, rx->status);
+ err = -EINVAL;
+ }
+
+ /*
+ * This definitely indicates a bug, either in this driver or in
+ * the backend driver. In future this should flag the bad
+ * situation to the system controller to reboot the backed.
+ */
+ if (ref == GRANT_INVALID_REF) {
+ WPRINTK("Bad rx response id %d.\n", rx->id);
+ err = -EINVAL;
+ goto next;
+ }
+
+ /* Memory pressure, insufficient buffer headroom, ... */
+ if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
+ if (net_ratelimit())
+ WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
+ rx->id, rx->status);
+ xennet_move_rx_slot(np, skb, ref);
+ err = -ENOMEM;
+ goto next;
+ }
+
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ struct page *page = skb_shinfo(skb)->frags[0].page;
+ unsigned long pfn = page_to_pfn(page);
+ void *vaddr = page_address(page);
+
+ MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+ pfn_pte_ma(mfn, PAGE_KERNEL),
+ 0);
+ mcl++;
+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE;
+ mmu->val = pfn;
+ mmu++;
+
+ set_phys_to_machine(pfn, mfn);
+ }
+
+ __skb_queue_tail(list, skb);
+
+next:
+ if (!(rx->flags & NETRXF_more_data))
+ break;
+
+ if (cons + frags == rp) {
+ if (net_ratelimit())
+ WPRINTK("Need more frags\n");
+ err = -ENOENT;
+ break;
+ }
+
+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+ skb = xennet_get_rx_skb(np, cons + frags);
+ ref = xennet_get_rx_ref(np, cons + frags);
+ frags++;
+ }
+
+ if (unlikely(frags > max)) {
+ if (net_ratelimit())
+ WPRINTK("Too many frags\n");
+ err = -E2BIG;
+ }
+
+ return err;
+}
+
+static RING_IDX xennet_fill_frags(struct netfront_info *np,
+ struct sk_buff *skb,
+ struct sk_buff_head *list)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+ RING_IDX cons = np->rx.rsp_cons;
+ skb_frag_t *frag = shinfo->frags + nr_frags;
+ struct sk_buff *nskb;
+
+ while ((nskb = __skb_dequeue(list))) {
+ struct netif_rx_response *rx =
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ frag->page = skb_shinfo(nskb)->frags[0].page;
+ frag->page_offset = rx->offset;
+ frag->size = rx->status;
+
+ skb->data_len += rx->status;
+
+ skb_shinfo(nskb)->nr_frags = 0;
+ kfree_skb(nskb);
+
+ frag++;
+ nr_frags++;
+ }
+
+ shinfo->nr_frags = nr_frags;
+ return cons;
+}
+
static int netif_poll(struct net_device *dev, int *pbudget)
{
struct netfront_info *np = netdev_priv(dev);
- struct sk_buff *skb, *nskb;
+ struct sk_buff *skb;
struct netif_rx_response *rx;
RING_IDX i, rp;
- struct mmu_update *mmu = np->rx_mmu;
- struct multicall_entry *mcl = np->rx_mcl;
+ struct multicall_entry *mcl;
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
+ struct sk_buff_head errq;
+ struct sk_buff_head tmpq;
unsigned long flags;
- unsigned long mfn;
- grant_ref_t ref;
+ unsigned int len;
+ int pages_done;
+ int err;
spin_lock(&np->rx_lock);
}
skb_queue_head_init(&rxq);
+ skb_queue_head_init(&errq);
+ skb_queue_head_init(&tmpq);
if ((budget = *pbudget) > dev->quota)
budget = dev->quota;
rp = np->rx.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for (i = np->rx.rsp_cons, work_done = 0;
+ for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
(i != rp) && (work_done < budget);
- i++, work_done++) {
+ np->rx.rsp_cons = ++i, work_done++) {
rx = RING_GET_RESPONSE(&np->rx, i);
- skb = xennet_get_rx_skb(np, i);
- ref = xennet_get_rx_ref(np, i);
+ err = xennet_get_responses(np, rx, rp, &tmpq, pages_done);
+ pages_done += skb_queue_len(&tmpq);
- /*
- * This definitely indicates a bug, either in this driver or in
- * the backend driver. In future this should flag the bad
- * situation to the system controller to reboot the backed.
- */
- if (ref == GRANT_INVALID_REF) {
- WPRINTK("Bad rx response id %d.\n", rx->id);
+ if (unlikely(err)) {
+ i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
work_done--;
+ while ((skb = __skb_dequeue(&tmpq)))
+ __skb_queue_tail(&errq, skb);
+ np->stats.rx_errors++;
continue;
}
- /* Memory pressure, insufficient buffer headroom, ... */
- if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
- if (net_ratelimit())
- WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
- rx->id, rx->status);
- xennet_move_rx_slot(np, skb, ref);
- work_done--;
- continue;
- }
+ skb = __skb_dequeue(&tmpq);
- gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ skb->nh.raw = (void *)skb_shinfo(skb)->frags[0].page;
+ skb->h.raw = skb->nh.raw + rx->offset;
+
+ len = rx->status;
+ if (len > RX_COPY_THRESHOLD)
+ len = RX_COPY_THRESHOLD;
+ skb_put(skb, len);
+
+ if (rx->status > len) {
+ skb_shinfo(skb)->frags[0].page_offset =
+ rx->offset + len;
+ skb_shinfo(skb)->frags[0].size = rx->status - len;
+ skb->data_len = rx->status - len;
+ } else {
+ skb_shinfo(skb)->frags[0].page = NULL;
+ skb_shinfo(skb)->nr_frags = 0;
+ }
- /* NB. We handle skb overflow later. */
- skb->data = skb->head + rx->offset;
- skb->len = rx->status;
- skb->tail = skb->data + skb->len;
+ i = xennet_fill_frags(np, skb, &tmpq);
+ skb->truesize += skb->data_len;
+ skb->len += skb->data_len;
/*
* Old backends do not assert data_validated but we
skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
np->stats.rx_packets++;
- np->stats.rx_bytes += rx->status;
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /* Remap the page. */
- MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
- pfn_pte_ma(mfn, PAGE_KERNEL),
- 0);
- mcl++;
- mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
- | MMU_MACHPHYS_UPDATE;
- mmu->val = __pa(skb->head) >> PAGE_SHIFT;
- mmu++;
-
- set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
- mfn);
- }
+ np->stats.rx_bytes += skb->len;
__skb_queue_tail(&rxq, skb);
}
/* Some pages are no longer absent... */
- balloon_update_driver_allowance(-work_done);
+ balloon_update_driver_allowance(-pages_done);
/* Do all the remapping work, and M2P updates, in one big hypercall. */
- if (likely((mcl - np->rx_mcl) != 0)) {
+ if (likely(pages_done)) {
+ mcl = np->rx_mcl + pages_done;
mcl->op = __HYPERVISOR_mmu_update;
mcl->args[0] = (unsigned long)np->rx_mmu;
- mcl->args[1] = mmu - np->rx_mmu;
+ mcl->args[1] = pages_done;
mcl->args[2] = 0;
mcl->args[3] = DOMID_SELF;
- mcl++;
- (void)HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
+ (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
}
- while ((skb = __skb_dequeue(&rxq)) != NULL) {
- if (skb->len > (dev->mtu + ETH_HLEN + 4)) {
- if (net_ratelimit())
- printk(KERN_INFO "Received packet too big for "
- "MTU (%d > %d)\n",
- skb->len - ETH_HLEN - 4, dev->mtu);
- skb->len = 0;
- skb->tail = skb->data;
- init_skb_shinfo(skb);
- dev_kfree_skb(skb);
- continue;
- }
+ while ((skb = __skb_dequeue(&errq)))
+ kfree_skb(skb);
- /*
- * Enough room in skbuff for the data we were passed? Also,
- * Linux expects at least 16 bytes headroom in each rx buffer.
- */
- if (unlikely(skb->tail > skb->end) ||
- unlikely((skb->data - skb->head) < 16)) {
- if (net_ratelimit()) {
- if (skb->tail > skb->end)
- printk(KERN_INFO "Received packet "
- "is %zd bytes beyond tail.\n",
- skb->tail - skb->end);
- else
- printk(KERN_INFO "Received packet "
- "is %zd bytes before head.\n",
- 16 - (skb->data - skb->head));
- }
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
+ struct page *page = (struct page *)skb->nh.raw;
+ void *vaddr = page_address(page);
- nskb = __dev_alloc_skb(skb->len + 2,
- GFP_ATOMIC|__GFP_NOWARN);
- if (nskb != NULL) {
- skb_reserve(nskb, 2);
- skb_put(nskb, skb->len);
- memcpy(nskb->data, skb->data, skb->len);
- /* Copy any other fields we already set up. */
- nskb->dev = skb->dev;
- nskb->ip_summed = skb->ip_summed;
- nskb->proto_data_valid = skb->proto_data_valid;
- nskb->proto_csum_blank = skb->proto_csum_blank;
- }
+ memcpy(skb->data, vaddr + (skb->h.raw - skb->nh.raw),
+ skb_headlen(skb));
- /* Reinitialise and then destroy the old skbuff. */
- skb->len = 0;
- skb->tail = skb->data;
- init_skb_shinfo(skb);
- dev_kfree_skb(skb);
+ if (page != skb_shinfo(skb)->frags[0].page)
+ __free_page(page);
- /* Switch old for new, if we copied the buffer. */
- if ((skb = nskb) == NULL)
- continue;
- }
-
- /* Set the shinfo area, which is hidden behind the data. */
- init_skb_shinfo(skb);
/* Ethernet work: Delayed to here as it peeks the header. */
skb->protocol = eth_type_trans(skb, dev);
dev->last_rx = jiffies;
}
- np->rx.rsp_cons = i;
-
/* If we get a callback with very few responses, reduce fill target. */
/* NB. Note exponential increase, linear decrease. */
if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
gnttab_grant_foreign_transfer_ref(
ref, np->xbdev->otherend_id,
- __pa(skb->data) >> PAGE_SHIFT);
+ page_to_pfn(skb_shinfo(np->rx_skbs[i])->frags->page));
RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
RING_GET_REQUEST(&np->rx, requeue_idx)->id = requeue_idx;